#delimit;
clear all;
set more off;

capture log using "04_Prepare_Grantee_NBER_and_AWS_data.log", replace;
/*******************************************************************************************/;
** INPUT: NBER data downloaded and combined with AWS data; 
** OUTPUT: Clean data set with grantee financials; 
** LAST MODIFIED: JAG, 7/10/2021;
** RUN TIME: <5 minutes using Stata 15 on computer with 2 x 2.60GHz processors and 64GB RAM;  													      	
/*******************************************************************************************/;

** NOTE: EXTRACTED FROM AWS 990 DATA FOR ADDITIONAL VARIABLES THAT ARE NOT INCLUDED IN NBER DATA;
** NOTE: AWS does not collect data from paper filings, so it covers fewer nonprofit organizations;
** SEE: https://registry.opendata.aws/irs990/;
use "../Raw Data/AWS/Form_990_AWS.dta", clear;
rename ein mein;
gen ein=substr(mein,1,2)+"-"+substr(mein,3,9);
gen l=length(ein);
assert l==10;
drop l mein;
bys ein year: keep if _n==1;
compress;
save "temp.dta", replace;

** OUR GOAL HERE IS TO HELP UNDERSTAND HOW INNOVATIVE DAFs MAY HELP CHANNEL MONEY, THUS WE WANT CHARACTERISTICS OF GRANTEES THEY GIVE TO;
** THIS IS 2013-2019 data which roughly represents fiscal year 2012-2018;
** SEE: https://www.nber.org/research/data/irs-form-990-data;
use "../Raw Data/NBER/NBER_990.dta", clear;

** NBER has EIN as a real # so need to modify to match the standard 9 digit string with "-" after second digit;
tostring ein, gen(stein);
rename ein rein;
gen l=length(stein);
tab l, m;
gen first1=substr(stein,1,1) if l==8;
gen remaining1=substr(stein,2,length(stein)) if l==8;
gen first2=substr(stein,1,2) if l==9;
gen remaining2=substr(stein,3,length(stein)) if l==9;
gen ein1="0"+first1+"-"+remaining1 if l==8;
gen ein2=first2+"-"+remaining2 if l==9;
gen ein=ein1;
replace ein=ein2 if ein=="";
drop first1 first2 remaining1 remaining2 ein1 ein2 stein l;
** This is intentional as sometimes has the month and sometimes does not;
gen year=floor(tax_pd/100);
tab year, m;
** Removing duplicate tax filings;
bys ein year: keep if _n==1;

** Merging in the variables from AWS related to tax form change;
merge 1:1 ein year using "temp.dta";
drop _m;

** FUNDRAISING -- Non-profits spend meaningful time and money soliciting funds, can DAFs improve efficiency?;
** This constructs fundraising variables by combining NBER and AWS data;
** NBER forms do not have the breakdown of total functional expenses to fundraising, administrative, etc...;
** Yet prior literature has shown the importance of these distinctions;
** Second not all fundraising expenses are created equal and their records are spread over several forms;
** Professional fundraising expenses may be 0 in that they didn't hire a professional, but they still had meaningful total fundraising expenses;
** There are also other relevant variables collected by NBER: grsincfundrsng (gross fundraising), lessdirfndrsng (fundraising expenses), netincfndrsng (fundraising income);
** Yet these NBER variables are only for "events" with greater than $5000 net income and so may ignore meaningful fundraising activity for smaller, grassroots non-profits;
** Finally, deferedrevenued is "fundraising events" revenue which typically exceeds grsincfundrsng because it is inclusive of the variety of fundraising activities;

foreach var in tot_func_exp_fund profndraising grsincfndrsng lessdirfndrsng netincfndrsng deferedrevnuend {;
replace `var'=0 if `var'==.;
replace `var'=0 if `var'<0;
};
** Note only net income from fundraising events can be negative;
count if grsincfndrsng<0;
foreach var in tot_func_exp_fund profndraising grsincfndrsng lessdirfndrsng deferedrevnuend {;
replace `var'=0 if `var'<0;
};
** I have looked at examples and see that they are listed as negative revenue but they don't seem to make sense;
sum deferedrevnuend, d;
sum grsincfndrsng, d;
egen gr_frev=rowmax(deferedrevnuend grsincfndrsng);
replace gr_frev=0 if gr_frev<0;

sum gr_frev, d;
local p75=r(p75);
local p90=r(p90);
gen gr_mjr75_frev=(gr_frev>=`p75');
gen gr_mjr90_frev=(gr_frev>=`p90');
gen gr_fexp=tot_func_exp_fund+profndraising;
gen gr_ind_f=(gr_fexp!=0);
tab gr_ind_f, m;
gen gr_f2exp=(gr_fexp/tot_func_exp_tot);
replace gr_f2exp=0 if gr_f2exp==.;
gen gr_f2contr=(gr_fexp/tot_contrib);
replace gr_f2contr=0 if gr_f2contr==.;
gen gr_pct_fevents=grsincfndrsng/gr_frev;
replace gr_pct_fevents=0 if gr_pct_fevents==.;
gen gr_mgn_fevents=netincfndrsng/grsincfndrsng;
replace gr_mgn_fevents=0 if gr_mgn_fevents==.;
drop netincfndrsng grsincfndrsng profndraising deferedrevnuend;

** Winsorizing;
foreach var in gr_mgn_fevents gr_pct_fevents gr_f2exp gr_f2contr{;
	winsor `var', gen(temp) p(0.02);
	drop `var'; 
	rename temp `var';
};

** Label fundraising variables;
label var gr_frev "Grantee Fundraising Revenue";
label var gr_mjr75_frev "Grantee in Top Quartile of Fundraising";
label var gr_mjr90_frev "Grantee in Top Decile of Fundraising";
label var gr_fexp "Grantee Fundraising Expense (functional+professional)";
label var gr_ind_f "Ind. Grantee Fundraises";
label var gr_f2exp "Grantee Fundraising Expenses to Total Expenses"; 
label var gr_f2contr "Grantee Fundraising Efficiency Fund Exp./Total Contr."; 
label var gr_pct_fevents "Grantee Pct. Revenue from Fundraising Events"; 
label var gr_mgn_fevents "Grantee Margin from Fundraising Events"; 
sum gr_*;

** NOTE: NBER DOES NOT PULL ANY OF THE GOVERNANCE, MGMT, OR DISCLOSURE INFORMATION FOR 501(c)3s;
** TO GET THIS INFO ONE WOULD NEED TO USE AWS OR REQUEST FROM GUIDESTAR;
** The available data is total compensation for key employees (Part VII Sec A);
foreach var in totreprtabled totcomprelatede totestcompf {;
replace `var'=0 if `var'==.;
};
gen gr_tot_ecomp=totreprtabled+totcomprelatede+totestcompf;
gen gr_highest_ecomp=highest_comp;
gen gr_100k_comp=noindiv100kcnt;
gen gr_100k_contr=nocontractor100kcnt;

label var gr_tot_ecomp "Grantee Total Executive Compensation";
label var gr_highest_ecomp "Grantee Highest Compensated Individual";
label var gr_100k_comp "# officers paid 100k+";
label var gr_100k_contr "# contractors paid 100k+";


** NOW CREATING MEASURES OF PROFITABILITY AND LIQUIDITY;
** Baber, Daniel, and Roberts find that administrative efficiency is program expenses scaled by total expenses and is an important CG measure;
** The idea being that perhaps they find charities with better historical metrics;
gen gr_adm_eff=tot_func_exp_prog/tot_func_exp_tot;
label var gr_adm_eff "Grantee administrative efficiency (1=all expenses programatic)";
gen gr_mgmt_eff=tot_func_exp_adm/(tot_func_exp_adm+tot_func_exp_fund);
label var gr_mgmt_eff "Grantee mgmt efficiency (1=mgmt expense, 0=fundraising exp)";

** are they profitable revenue less expenses;
** totrevenue = total revenue (Part VIII, line 12(A));
** totfuncexpns = total expenses (Part IX, line 25(A)), again note tot_func_exp_tot is the one pulled from AWS;
gen gr_net_prof=totrevenue-tot_func_exp_tot;
gen gr_prof_ind=(gr_net_prof>=0);
gen gr_prof_a=gr_net_prof/totassetsend;

** Unrestricted surplus is commonly used measure of profitability for nonprofits;
** Balance Sheet, Line 27B (Unrestricted net assets, end of year);
sort ein year;
bys ein: gen gr_prof_alt=(unrstrctnetasstsend-unrstrctnetasstsend[_n-1])/unrstrctnetasstsend[_n-1];
sum gr_prof_a gr_prof_alt, d; 

foreach var in gr_prof_a gr_prof_alt {;
	winsor `var', gen(temp) p(0.02);
	drop `var'; 
	rename temp `var';
};

** Is the grantee a commercial nonprofit? This is the case when service revenue / total revenue is in top quartile;
gen gr_pct_serv_rev=totprgmrevnue/totrevenue;
sum gr_pct_serv_rev, d;
local p75=r(p75);
gen gr_comm_np=(gr_pct_serv_rev>=`p75');
tab gr_comm_np, m;
label var gr_comm_np "Grantee Commercial Non-profit";

** Is the grantee reliant on gifts?  This is the case when totcontrbgfts / total revenue is in top quartile;;
gen gr_pct_gift_rev=totcntrbgfts/totrevenue;
sum gr_pct_gift_rev, d;
local p75=r(p75);
gen gr_gift_dep=(gr_pct_gift_rev>=`p75');
tab gr_gift_dep, m;
label var gr_gift_dep "Grantee Gift Dependent";

** Do they use an auditor;
gen gr_uses_auditor=(sepindaudfinstmtcd=="Y");
tab gr_uses_auditor, m;

** Age of grantee;
gen gr_age=year-formation_year;
bys ein: egen min_year=min(year);
replace gr_age=year-min_year if gr_age==.;
assert gr_age!=.;
gen gr_lage=log(1+gr_age);

** LIQUIDITY;
** [Balance Sheet, Line 1B (Cash = nonintcashend) + Line 2B (Savings = svngstempinvend)] divided by;
** [(Line 25A (Total functional expenses = totfuncexpns) - Line 22A (Depreciation = deprcatndepletn)) / 12 (months)];
gen gr_mths_liquid=((nonintcashend+svngstempinvend)/((totfuncexpns-deprcatndepletn)/12));

** LUNA (liquid unrestricted net assets) is common measure of financial flexibility and risk tolderance;
** [Line 27B (Unrestricted net assets = unrstrctnetasstsend) - Line 10c (Land, buildings, and equipment = lndbldgsequipend) - Line 23B (Secured mortgages = secrdmrtgsend) - Line 20B (Tax-exempt bond)] ;
** divided by [(Line 25A (Total functional expenses) - Line 22A (Depreciation)) / 12 (months)];
* NOTE: When net equity in PPE is negative, it is excluded from the calculation of months of unrestricted liquid net assets;
replace lndbldgsequipend=0 if lndbldgsequipend==.;
** Switching PPE to 0 when it is negative;
replace lndbldgsequipend=0 if lndbldgsequipend<=0;
gen gr_mths_luna=((unrstrctnetasstsend-lndbldgsequipend-secrdmrtgsend-txexmptbndsend)/((totfuncexpns-deprcatndepletn)/12));

** CALCULATE VOLATILITY OF CONTRIBUTIONS;
sort ein year;
rename totcntrbgfts contrib_rev;
	bys ein: gen lag_contrib_rev=contrib_rev[_n-1] if year-1==year[_n-1];
	bys ein: gen lag2_contrib_rev=contrib_rev[_n-2] if year-2==year[_n-2];
egen gr_volat=rowsd(contrib_rev lag_contrib_rev lag2_contrib_rev);

** POOR GOVERNANCE;
gen bgov1=(grantoofficercd=="Y");
gen bgov2=(dirbusnreltdcd=="Y");
gen bgov3=(fmlybusnreltdcd=="Y");
gen bgov4=(servasofficercd=="Y");
gen bgov5=(gr_100k_comp>0 & gr_100k_comp!=.);
gen bgov6=(gr_100k_contr>0 & gr_100k_contr!=.);
gen bgov_tot=bgov1+bgov2+bgov3+bgov4+bgov5+bgov6;
egen bgov_max=rowmax(bgov1 bgov2 bgov3 bgov4 bgov5 bgov6);
sort ein year;

** OVERALL SIZE;
sort ein year;
gen gr_tot_asset=totassetsend;
gen gr_unr_asset=unrstrctnetasstsend;

foreach var in gr_mgmt_eff gr_adm_eff gr_age gr_lage gr_frev gr_fexp gr_ind_f gr_f2exp 
gr_f2contr gr_pct_fevents gr_mgn_fevent gr_tot_asset gr_unr_asset
gr_mjr75_frev gr_mjr90_frev gr_tot_ecomp gr_highest_ecomp gr_100k_comp gr_100k_contr
gr_mths_luna gr_mths_liquid gr_gift_dep gr_comm_np gr_prof_a gr_prof_alt gr_prof_ind 
gr_uses_auditor gr_volat bgov_tot bgov_max bgov1 bgov2 bgov3 bgov4 bgov5 bgov6{;
	bys ein: gen lag_`var'=`var'[_n-1] if year-1==year[_n-1];
};

keep ein year gr_mgmt_eff gr_adm_eff gr_age gr_lage gr_frev gr_fexp gr_ind_f gr_f2exp 
gr_f2contr gr_pct_fevents gr_mgn_fevent gr_tot_asset gr_unr_asset
gr_mjr75_frev gr_mjr90_frev gr_tot_ecomp gr_highest_ecomp gr_100k_comp gr_100k_contr
gr_mths_luna gr_mths_liquid gr_gift_dep gr_comm_np gr_prof_a gr_prof_alt gr_prof_ind 
gr_uses_auditor gr_volat bgov_tot bgov_max bgov1 bgov2 bgov3 bgov4 bgov5 bgov6 lag_*;
rename ein gr_ein;

save "NBER Grantee for merge.dta", replace;

